loading the libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(cluster)
library(viridis)
## Loading required package: viridisLite
library(leaflet)
library(mapview)
library(tmap)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(purrr)

loading the data

demographics <- read_rds("demographics.rds")
demographics %>% head()
## # A tibble: 6 x 22
##   GEOID county        state                  geometry medinc l_spanish l_russian
##   <chr> <chr>         <chr>        <MULTIPOLYGON [°]>  <dbl>     <dbl>     <dbl>
## 1 48081 Coke County   Texas (((-100.825 31.74941, -1~  25556     0.484   0      
## 2 48273 Kleberg Coun~ Texas (((-97.3178 27.49456, -9~  22844     0.491   0      
## 3 48203 Harrison Cou~ Texas (((-94.70215 32.45618, -~  26572     0.532   0.00367
## 4 48223 Hopkins Coun~ Texas (((-95.86333 33.04989, -~  26608     0.696   0.00374
## 5 48033 Borden County Texas (((-101.6913 32.96184, -~  35926     0.560   0      
## 6 48419 Shelby County Texas (((-94.51143 31.97398, -~  22658     0.903   0      
## # ... with 15 more variables: l_korean <dbl>, l_chinese <dbl>,
## #   l_vietnamese <dbl>, l_other_lang <dbl>, p_e_less <dbl>, p_foreign <dbl>,
## #   hispanic <dbl>, white_alone <dbl>, black_alone <dbl>,
## #   amercian_Indian_alone <dbl>, asian_alone <dbl>, pacific_alone <dbl>,
## #   other_race <dbl>, two_or_more_race <dbl>, cluster <dbl>

settting the distance matrix

set.seed(23994)
vars <- demographics %>% select(-GEOID, -county, -state, -geometry) %>% colnames()
distance_matrix <- cluster::daisy(demographics[,vars])
res.pam <- cluster::pam(x=distance_matrix, k = 5, cluster.only = F)
demographics$cluster <- as.double(res.pam$clustering) 
demographics %>% filter(county == "King County" & state == "Washington") %>% select(cluster)
## # A tibble: 1 x 1
##   cluster
##     <dbl>
## 1       4
demographics_match <- demographics %>% filter(cluster == 4)
demographics_match %>% select(vars) %>% summary()
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(vars)` instead of `vars` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
##      medinc        l_spanish        l_russian          l_korean        
##  Min.   :34758   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000000  
##  1st Qu.:36266   1st Qu.:0.5295   1st Qu.:0.01517   1st Qu.:0.0003992  
##  Median :38290   Median :0.6387   Median :0.04257   Median :0.0148234  
##  Mean   :39826   Mean   :0.6221   Mean   :0.07254   Mean   :0.0277206  
##  3rd Qu.:41251   3rd Qu.:0.7251   3rd Qu.:0.09629   3rd Qu.:0.0368839  
##  Max.   :70390   Max.   :2.7113   Max.   :0.50000   Max.   :0.4549791  
##    l_chinese        l_vietnamese       l_other_lang         p_e_less      
##  Min.   :0.00000   Min.   :0.000000   Min.   : 0.00000   Min.   :0.00000  
##  1st Qu.:0.01203   1st Qu.:0.001317   1st Qu.: 0.01973   1st Qu.:0.01426  
##  Median :0.04386   Median :0.015026   Median : 0.04196   Median :0.03125  
##  Mean   :0.08001   Mean   :0.031270   Mean   : 0.11733   Mean   :0.04719  
##  3rd Qu.:0.10086   3rd Qu.:0.040162   3rd Qu.: 0.09845   3rd Qu.:0.05821  
##  Max.   :1.14146   Max.   :0.259309   Max.   :10.18090   Max.   :0.24534  
##    p_foreign          hispanic         white_alone      black_alone     
##  Min.   :0.00000   Min.   :0.003497   Min.   :0.1659   Min.   :0.00000  
##  1st Qu.:0.03245   1st Qu.:0.032491   1st Qu.:0.7344   1st Qu.:0.01094  
##  Median :0.06664   Median :0.063184   Median :0.8650   Median :0.03521  
##  Mean   :0.09353   Mean   :0.097290   Mean   :0.8135   Mean   :0.06794  
##  3rd Qu.:0.12274   3rd Qu.:0.130582   3rd Qu.:0.9281   3rd Qu.:0.09667  
##  Max.   :0.43173   Max.   :0.688263   Max.   :0.9862   Max.   :0.62672  
##  amercian_Indian_alone  asian_alone      pacific_alone         other_race     
##  Min.   :0.000000      Min.   :0.00000   Min.   :0.000e+00   Min.   :0.00000  
##  1st Qu.:0.001752      1st Qu.:0.00934   1st Qu.:2.048e-05   1st Qu.:0.00652  
##  Median :0.003019      Median :0.02552   Median :3.713e-04   Median :0.01404  
##  Mean   :0.012330      Mean   :0.04548   Mean   :2.745e-03   Mean   :0.02624  
##  3rd Qu.:0.005844      3rd Qu.:0.05470   3rd Qu.:8.703e-04   3rd Qu.:0.03037  
##  Max.   :0.512357      Max.   :0.42657   Max.   :2.727e-01   Max.   :0.51620  
##  two_or_more_race      cluster 
##  Min.   :0.003073   Min.   :4  
##  1st Qu.:0.018742   1st Qu.:4  
##  Median :0.025988   Median :4  
##  Mean   :0.031794   Mean   :4  
##  3rd Qu.:0.036374   3rd Qu.:4  
##  Max.   :0.232220   Max.   :4

number of counties in each cluster

data <- demographics %>% select(vars) 
data %>%                            
  split(.$cluster) %>%
  map(summary)
## $`1`
##      medinc        l_spanish        l_russian           l_korean       
##  Min.   :22978   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:23919   1st Qu.:0.4725   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :24736   Median :0.6296   Median :0.004501   Median :0.000000  
##  Mean   :24625   Mean   :0.6402   Mean   :0.031919   Mean   :0.013699  
##  3rd Qu.:25364   3rd Qu.:0.7538   3rd Qu.:0.028705   3rd Qu.:0.009624  
##  Max.   :26032   Max.   :7.6667   Max.   :1.445652   Max.   :0.467391  
##    l_chinese         l_vietnamese      l_other_lang         p_e_less       
##  Min.   :0.000000   Min.   :0.00000   Min.   : 0.00000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.: 0.00000   1st Qu.:0.007435  
##  Median :0.003293   Median :0.00000   Median : 0.01269   Median :0.015863  
##  Mean   :0.031482   Mean   :0.01852   Mean   : 0.10947   Mean   :0.030518  
##  3rd Qu.:0.024718   3rd Qu.:0.01240   3rd Qu.: 0.04829   3rd Qu.:0.034176  
##  Max.   :1.205496   Max.   :1.56757   Max.   :11.44068   Max.   :0.346853  
##    p_foreign          hispanic          white_alone      black_alone      
##  Min.   :0.00000   Min.   :0.0002395   Min.   :0.1088   Min.   :0.000000  
##  1st Qu.:0.01221   1st Qu.:0.0207827   1st Qu.:0.7526   1st Qu.:0.007527  
##  Median :0.02374   Median :0.0384550   Median :0.8899   Median :0.026496  
##  Mean   :0.03907   Mean   :0.0927959   Mean   :0.8317   Mean   :0.089777  
##  3rd Qu.:0.04627   3rd Qu.:0.0967492   3rd Qu.:0.9470   3rd Qu.:0.120869  
##  Max.   :0.53720   Max.   :0.8256242   Max.   :0.9971   Max.   :0.726194  
##  amercian_Indian_alone  asian_alone       pacific_alone      
##  Min.   :0.000000      Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.:0.001737      1st Qu.:0.002523   1st Qu.:0.0000000  
##  Median :0.003939      Median :0.005141   Median :0.0001928  
##  Mean   :0.023894      Mean   :0.008023   Mean   :0.0007980  
##  3rd Qu.:0.009953      3rd Qu.:0.009396   3rd Qu.:0.0008423  
##  Max.   :0.839403      Max.   :0.103458   Max.   :0.0223422  
##    other_race       two_or_more_race     cluster 
##  Min.   :0.000000   Min.   :0.00000   Min.   :1  
##  1st Qu.:0.002973   1st Qu.:0.01388   1st Qu.:1  
##  Median :0.008294   Median :0.02035   Median :1  
##  Mean   :0.021147   Mean   :0.02464   Mean   :1  
##  3rd Qu.:0.022122   3rd Qu.:0.02939   3rd Qu.:1  
##  Max.   :0.335762   Max.   :0.17613   Max.   :1  
## 
## $`2`
##      medinc        l_spanish         l_russian          l_korean       
##  Min.   : 8641   Min.   : 0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:19688   1st Qu.: 0.4801   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :21188   Median : 0.6620   Median :0.00000   Median :0.000000  
##  Mean   :20609   Mean   : 0.7561   Mean   :0.02554   Mean   :0.015585  
##  3rd Qu.:22104   3rd Qu.: 0.8315   3rd Qu.:0.01105   3rd Qu.:0.004766  
##  Max.   :22957   Max.   :22.2500   Max.   :6.00000   Max.   :1.054545  
##    l_chinese        l_vietnamese       l_other_lang         p_e_less       
##  Min.   :0.00000   Min.   :0.000000   Min.   : 0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.: 0.00000   1st Qu.:0.005259  
##  Median :0.00000   Median :0.000000   Median : 0.00636   Median :0.012590  
##  Mean   :0.03794   Mean   :0.016851   Mean   : 0.53830   Mean   :0.032879  
##  3rd Qu.:0.01479   3rd Qu.:0.007121   3rd Qu.: 0.04837   3rd Qu.:0.028669  
##  Max.   :2.94118   Max.   :1.385417   Max.   :63.22222   Max.   :0.536325  
##    p_foreign           hispanic          white_alone      black_alone     
##  Min.   :0.000000   Min.   :0.0002701   Min.   :0.0360   Min.   :0.00000  
##  1st Qu.:0.009438   1st Qu.:0.0161756   1st Qu.:0.5790   1st Qu.:0.00861  
##  Median :0.017783   Median :0.0292859   Median :0.8060   Median :0.05525  
##  Mean   :0.033822   Mean   :0.1031399   Mean   :0.7342   Mean   :0.18233  
##  3rd Qu.:0.035351   3rd Qu.:0.0633609   3rd Qu.:0.9368   3rd Qu.:0.33606  
##  Max.   :0.399427   Max.   :0.9917444   Max.   :0.9983   Max.   :0.87226  
##  amercian_Indian_alone  asian_alone       pacific_alone      
##  Min.   :0.000000      Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.:0.001062      1st Qu.:0.001175   1st Qu.:0.0000000  
##  Median :0.003038      Median :0.003842   Median :0.0000000  
##  Mean   :0.037187      Mean   :0.005893   Mean   :0.0005395  
##  3rd Qu.:0.009817      3rd Qu.:0.007362   3rd Qu.:0.0004996  
##  Max.   :0.933240      Max.   :0.084682   Max.   :0.0156192  
##    other_race       two_or_more_race      cluster 
##  Min.   :0.000000   Min.   :0.000000   Min.   :2  
##  1st Qu.:0.001899   1st Qu.:0.009727   1st Qu.:2  
##  Median :0.006319   Median :0.016546   Median :2  
##  Mean   :0.018075   Mean   :0.021774   Mean   :2  
##  3rd Qu.:0.018914   3rd Qu.:0.025813   3rd Qu.:2  
##  Max.   :0.344488   Max.   :0.178688   Max.   :2  
## 
## $`3`
##      medinc        l_spanish         l_russian          l_korean       
##  Min.   :26045   Min.   : 0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:26686   1st Qu.: 0.4788   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :27345   Median : 0.6112   Median :0.01063   Median :0.001029  
##  Mean   :27460   Mean   : 0.6534   Mean   :0.05664   Mean   :0.020000  
##  3rd Qu.:28206   3rd Qu.: 0.7421   3rd Qu.:0.04194   3rd Qu.:0.012964  
##  Max.   :29248   Max.   :10.5000   Max.   :8.85714   Max.   :5.000000  
##    l_chinese        l_vietnamese        l_other_lang        p_e_less       
##  Min.   :0.00000   Min.   :0.0000000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.0000000   1st Qu.:0.00142   1st Qu.:0.008511  
##  Median :0.00870   Median :0.0000682   Median :0.01579   Median :0.018072  
##  Mean   :0.04220   Mean   :0.0157165   Mean   :0.07318   Mean   :0.032030  
##  3rd Qu.:0.03253   3rd Qu.:0.0124274   3rd Qu.:0.04875   3rd Qu.:0.037927  
##  Max.   :4.54545   Max.   :0.4955752   Max.   :6.21875   Max.   :0.292529  
##    p_foreign          hispanic          white_alone      black_alone      
##  Min.   :0.00000   Min.   :0.0009525   Min.   :0.1507   Min.   :0.000000  
##  1st Qu.:0.01518   1st Qu.:0.0226126   1st Qu.:0.8251   1st Qu.:0.006388  
##  Median :0.02682   Median :0.0429029   Median :0.9092   Median :0.018415  
##  Mean   :0.04401   Mean   :0.0940776   Mean   :0.8649   Mean   :0.062433  
##  3rd Qu.:0.05612   3rd Qu.:0.1029515   3rd Qu.:0.9526   3rd Qu.:0.069891  
##  Max.   :0.30641   Max.   :0.7283305   Max.   :0.9954   Max.   :0.595274  
##  amercian_Indian_alone  asian_alone       pacific_alone      
##  Min.   :0.000000      Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.:0.001930      1st Qu.:0.003510   1st Qu.:0.0000000  
##  Median :0.004002      Median :0.006407   Median :0.0002159  
##  Mean   :0.015338      Mean   :0.010656   Mean   :0.0008609  
##  3rd Qu.:0.009756      3rd Qu.:0.012666   3rd Qu.:0.0007981  
##  Max.   :0.782648      Max.   :0.196750   Max.   :0.0351223  
##    other_race       two_or_more_race     cluster 
##  Min.   :0.000000   Min.   :0.00000   Min.   :3  
##  1st Qu.:0.003378   1st Qu.:0.01477   1st Qu.:3  
##  Median :0.009378   Median :0.02081   Median :3  
##  Mean   :0.021382   Mean   :0.02447   Mean   :3  
##  3rd Qu.:0.021863   3rd Qu.:0.03047   3rd Qu.:3  
##  Max.   :0.475813   Max.   :0.17007   Max.   :3  
## 
## $`4`
##      medinc        l_spanish        l_russian          l_korean        
##  Min.   :34758   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000000  
##  1st Qu.:36266   1st Qu.:0.5295   1st Qu.:0.01517   1st Qu.:0.0003992  
##  Median :38290   Median :0.6387   Median :0.04257   Median :0.0148234  
##  Mean   :39826   Mean   :0.6221   Mean   :0.07254   Mean   :0.0277206  
##  3rd Qu.:41251   3rd Qu.:0.7251   3rd Qu.:0.09629   3rd Qu.:0.0368839  
##  Max.   :70390   Max.   :2.7113   Max.   :0.50000   Max.   :0.4549791  
##    l_chinese        l_vietnamese       l_other_lang         p_e_less      
##  Min.   :0.00000   Min.   :0.000000   Min.   : 0.00000   Min.   :0.00000  
##  1st Qu.:0.01203   1st Qu.:0.001317   1st Qu.: 0.01973   1st Qu.:0.01426  
##  Median :0.04386   Median :0.015026   Median : 0.04196   Median :0.03125  
##  Mean   :0.08001   Mean   :0.031270   Mean   : 0.11733   Mean   :0.04719  
##  3rd Qu.:0.10086   3rd Qu.:0.040162   3rd Qu.: 0.09845   3rd Qu.:0.05821  
##  Max.   :1.14146   Max.   :0.259309   Max.   :10.18090   Max.   :0.24534  
##    p_foreign          hispanic         white_alone      black_alone     
##  Min.   :0.00000   Min.   :0.003497   Min.   :0.1659   Min.   :0.00000  
##  1st Qu.:0.03245   1st Qu.:0.032491   1st Qu.:0.7344   1st Qu.:0.01094  
##  Median :0.06664   Median :0.063184   Median :0.8650   Median :0.03521  
##  Mean   :0.09353   Mean   :0.097290   Mean   :0.8135   Mean   :0.06794  
##  3rd Qu.:0.12274   3rd Qu.:0.130582   3rd Qu.:0.9281   3rd Qu.:0.09667  
##  Max.   :0.43173   Max.   :0.688263   Max.   :0.9862   Max.   :0.62672  
##  amercian_Indian_alone  asian_alone      pacific_alone         other_race     
##  Min.   :0.000000      Min.   :0.00000   Min.   :0.000e+00   Min.   :0.00000  
##  1st Qu.:0.001752      1st Qu.:0.00934   1st Qu.:2.048e-05   1st Qu.:0.00652  
##  Median :0.003019      Median :0.02552   Median :3.713e-04   Median :0.01404  
##  Mean   :0.012330      Mean   :0.04548   Mean   :2.745e-03   Mean   :0.02624  
##  3rd Qu.:0.005844      3rd Qu.:0.05470   3rd Qu.:8.703e-04   3rd Qu.:0.03037  
##  Max.   :0.512357      Max.   :0.42657   Max.   :2.727e-01   Max.   :0.51620  
##  two_or_more_race      cluster 
##  Min.   :0.003073   Min.   :4  
##  1st Qu.:0.018742   1st Qu.:4  
##  Median :0.025988   Median :4  
##  Mean   :0.031794   Mean   :4  
##  3rd Qu.:0.036374   3rd Qu.:4  
##  Max.   :0.232220   Max.   :4  
## 
## $`5`
##      medinc        l_spanish         l_russian           l_korean       
##  Min.   :29284   Min.   : 0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:30369   1st Qu.: 0.5028   1st Qu.:0.002048   1st Qu.:0.000000  
##  Median :31209   Median : 0.6264   Median :0.017469   Median :0.003615  
##  Mean   :31385   Mean   : 0.6305   Mean   :0.044419   Mean   :0.013005  
##  3rd Qu.:32224   3rd Qu.: 0.7351   3rd Qu.:0.048310   3rd Qu.:0.014192  
##  Max.   :34738   Max.   :16.0000   Max.   :1.841727   Max.   :0.536585  
##    l_chinese        l_vietnamese       l_other_lang         p_e_less       
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.003471   1st Qu.:0.009475  
##  Median :0.01028   Median :0.002494   Median :0.019176   Median :0.019689  
##  Mean   :0.03488   Mean   :0.019451   Mean   :0.058934   Mean   :0.033994  
##  3rd Qu.:0.03889   3rd Qu.:0.021348   3rd Qu.:0.056281   3rd Qu.:0.041931  
##  Max.   :0.67234   Max.   :1.000000   Max.   :1.551724   Max.   :0.322680  
##    p_foreign          hispanic          white_alone      black_alone      
##  Min.   :0.00000   Min.   :0.0003914   Min.   :0.3046   Min.   :0.000000  
##  1st Qu.:0.01752   1st Qu.:0.0267769   1st Qu.:0.8321   1st Qu.:0.005879  
##  Median :0.03314   Median :0.0487612   Median :0.9229   Median :0.013723  
##  Mean   :0.05219   Mean   :0.0885606   Mean   :0.8757   Mean   :0.052872  
##  3rd Qu.:0.06479   3rd Qu.:0.0995825   3rd Qu.:0.9582   3rd Qu.:0.057868  
##  Max.   :0.47238   Max.   :0.6645812   Max.   :0.9942   Max.   :0.623532  
##  amercian_Indian_alone  asian_alone       pacific_alone         other_race     
##  Min.   :0.000000      Min.   :0.000000   Min.   :0.0000000   Min.   :0.00000  
##  1st Qu.:0.001863      1st Qu.:0.004097   1st Qu.:0.0000000   1st Qu.:0.00394  
##  Median :0.003553      Median :0.008007   Median :0.0002134   Median :0.01018  
##  Mean   :0.009915      Mean   :0.015828   Mean   :0.0010476   Mean   :0.02078  
##  3rd Qu.:0.007727      3rd Qu.:0.016925   3rd Qu.:0.0007089   3rd Qu.:0.02183  
##  Max.   :0.386749      Max.   :0.330198   Max.   :0.1220201   Max.   :0.55425  
##  two_or_more_race     cluster 
##  Min.   :0.00000   Min.   :5  
##  1st Qu.:0.01403   1st Qu.:5  
##  Median :0.02085   Median :5  
##  Mean   :0.02389   Mean   :5  
##  3rd Qu.:0.02971   3rd Qu.:5  
##  Max.   :0.28886   Max.   :5
#table(demographics$cluster, exclude = 'nothing')

quick map

demographics %>% filter(cluster == 4) %>% st_sf() %>% mapview(zcol = "cluster")
demographics_sample <- demographics_match %>%
  slice_sample(n = 40)
demographics_sample %>% select(county, state)
## # A tibble: 40 x 2
##    county                  state               
##    <chr>                   <chr>               
##  1 Juneau City and Borough Alaska              
##  2 Cumberland County       Pennsylvania        
##  3 District of Columbia    District of Columbia
##  4 Grundy County           Illinois            
##  5 Worcester County        Massachusetts       
##  6 Clarke County           Virginia            
##  7 El Dorado County        California          
##  8 Cobb County             Georgia             
##  9 Manassas Park city      Virginia            
## 10 Lander County           Nevada              
## # ... with 30 more rows